We need the following packages:
required_packages <- c("readxl",
"purrr",
"dplyr",
"stringr",
"tidyr",
"AMR",
"RColorBrewer",
"magrittr")
Installing those that are not already installed on the system:
for (package in required_packages) {
if (!package %in% installed.packages()) install.packages(package)
}
Loading the required packages:
devnull <- lapply(required_packages, require, character.only = TRUE)
Date and type of antimicrobial use per farm:
amu <- read_excel("AMU_KietStudy_Marc.xlsx")
It is a data frame that looks like this:
amu
## # A tibble: 90 × 5
## FarmID AgeUse_Day DurationOfUse_days ProductNo AAI
## <chr> <dbl> <dbl> <dbl> <chr>
## 1 K06 30 4 1 sulfadimidine
## 2 K06 30 4 1 sulfaquinoxaline
## 3 K06 35 3 2 doxycyline
## 4 K06 35 3 2 tylosin
## 5 K07 1 4 1 oxytetracyline
## 6 K07 1 4 1 colistin
## 7 K07 36 1 2 ampicillin
## 8 K07 36 1 2 colistin
## 9 K07 38 3 3 flumequine
## 10 K07 38 3 4 enrofloxacin
## # … with 80 more rows
Antimicrobial class of the antimicrobial(s) against which each of the resistance genes is active:
classes <- read_excel("FarmvsClass.xlsx")
It’s a data frame that looks like this:
classes
## # A tibble: 81 × 2
## EvaGreen_Name Antimicrobial_Class
## <chr> <chr>
## 1 aac3-liacde Aminoglycoside
## 2 aac6-aph2 Aminoglycoside
## 3 aac6-lb Aminoglycoside
## 4 aac6-li Aminoglycoside
## 5 aac6-lla Aminoglycoside
## 6 aadA Aminoglycoside
## 7 aadE Aminoglycoside
## 8 aadE-like Aminoglycoside
## 9 acrA Multidrug
## 10 acrB Multidrug
## # … with 71 more rows
Note that each resistance gene can be active against antimicrobials of more than one antimicrobial class.
The names of the farms:
(farms <- paste0("K", c(formatC(6:9, 1, flag = "0"), 11:26)))
## [1] "K06" "K07" "K08" "K09" "K11" "K12" "K13" "K14" "K15" "K16" "K17" "K18"
## [13] "K19" "K20" "K21" "K22" "K23" "K24" "K25" "K26"
Let’s start by looking at the resistance genes concentrations in chicken only (maybe we’ll look at the rat data later on):
genes <- farms %>%
map(read_excel, path = "KietAnalysisData.xlsx") %>%
map(filter, Group.1 %in% c("Chicken-Control", "Chicken-Treatment")) %>%
map(select, -Group.1, -TotalLog2Value) %>%
setNames(farms)
In the excel file, the gene concentrations for a given farm (with
control and treatment experiments) are in a different tab. This
structure is mapped in the genes object that is a list of
data frames, each of them containing the genes concentrations for a
given farm. As an example the data for the first farm look like
this:
genes[[1]]
## # A tibble: 10 × 84
## FarmID Group Sample_Name SamplingDay `aac3-liacde` `aac6-aph2` `aac6-lb`
## <chr> <chr> <chr> <chr> <dbl> <dbl> <dbl>
## 1 K06 Control K06-BC Before 4.74 7.42 5.75
## 2 K06 Control K06-07C 7 1.96 5.55 2.21
## 3 K06 Control K06-14C 14 0.205 4.55 0.138
## 4 K06 Control K06-60C 60 1.46 6.01 1.69
## 5 K06 Control K06-EC End 0.856 5.57 1.08
## 6 K06 Treatment K06-B Before 3.65 6.20 5.18
## 7 K06 Treatment K06-07 7 4.00 5.85 3.93
## 8 K06 Treatment K06-14 14 1.96 5.01 0.803
## 9 K06 Treatment K06-60 60 5.36 3.07 5.93
## 10 K06 Treatment K06-E End 5.72 5.49 4.44
## # … with 77 more variables: `aac6-li` <dbl>, `aac6-lla` <dbl>, aadA <dbl>,
## # aadE <dbl>, `aadE-like` <dbl>, acrA <dbl>, acrB <dbl>, acrF <dbl>,
## # `aph2-lb` <dbl>, `aph2-lde` <dbl>, `aph3-lac` <dbl>, `aph3-lll` <dbl>,
## # arnA <dbl>, bacA_1 <dbl>, bacA_2 <dbl>, blaAMPC <dbl>, blaCMY2 <dbl>,
## # blaCTXM <dbl>, blaDHA <dbl>, blaPSE <dbl>, blaSHV <dbl>, blaTEM <dbl>,
## # blaZ <dbl>, cat <dbl>, cblA <dbl>, cepA <dbl>, cepA2 <dbl>, cfr <dbl>,
## # cfr2 <dbl>, cfxA <dbl>, `cmlA1-01` <dbl>, cmr <dbl>, dfrA <dbl>, …
Let’s check that the names of the columns are the same for all the farms:
genes %>%
map_df(names) %>%
apply(1, unique) %>%
.[map_int(., length) > 1]
## [[1]]
## [1] "FarmID" "K09"
There is a problem with with the FarmID variable that is
called K09 is one of the farms. Let’s fix this. As the
names of the variables seem OK in the first farm:
(correct_names <- names(genes[[1]]))
## [1] "FarmID" "Group" "Sample_Name" "SamplingDay" "aac3-liacde"
## [6] "aac6-aph2" "aac6-lb" "aac6-li" "aac6-lla" "aadA"
## [11] "aadE" "aadE-like" "acrA" "acrB" "acrF"
## [16] "aph2-lb" "aph2-lde" "aph3-lac" "aph3-lll" "arnA"
## [21] "bacA_1" "bacA_2" "blaAMPC" "blaCMY2" "blaCTXM"
## [26] "blaDHA" "blaPSE" "blaSHV" "blaTEM" "blaZ"
## [31] "cat" "cblA" "cepA" "cepA2" "cfr"
## [36] "cfr2" "cfxA" "cmlA1-01" "cmr" "dfrA"
## [41] "dfrF" "ermA" "ermB" "ermC" "floR"
## [46] "folA" "fosB" "fox5" "macB" "mcr-1"
## [51] "mcr-2" "mcr-3" "mdtF" "mdtL" "mdtO"
## [56] "mecA" "mefa10" "mefA3" "mfsA" "oprD"
## [61] "oqxA" "oqxB" "qacA" "qacC" "qacE"
## [66] "qnrA" "qnrB" "qnrS" "sat4" "spc"
## [71] "strB" "sul1" "sul2" "sul3" "tetB"
## [76] "tetC-01" "tetM" "tetO" "tetQ" "tetW"
## [81] "tolC" "vanA" "vanB" "vatA"
Let’s use them as variables names for all the farms:
genes %<>% map(setNames, correct_names)
Note also that not all farms have a “Before” measurement in the control group:
(tmp <- genes %>%
map(~ .x %>% filter(SamplingDay == "Before") %>% select(Group)) %>%
.[map_int(., nrow) < 2] %>%
unlist())
## K14.Group K15.Group K16.Group K17.Group K18.Group K20.Group
## "Treatment" "Treatment" "Treatment" "Treatment" "Treatment" "Treatment"
## K21.Group K22.Group K23.Group
## "Treatment" "Treatment" "Treatment"
For the farms where the “Before” measurement is missing in the
control group, let’s just use the “Before” measurement of the treatment
group. For that, we need the following function where x is
a data frame for one farm:
control_before <- function(x) {
y <- filter(x, SamplingDay == "Before")
y$Group <- "Control"
y$Sample_Name <- NA
rbind(x, y)
}
Let’s now use this function on all the farms that needed to be fixed:
# the names of the farms that need to be fixed:
farms_to_fix <- tmp %>%
names() %>%
str_remove(".Group")
# the fixed data for these farms:
fixed_farms <- genes %>%
extract(farms_to_fix) %>%
map(control_before)
# updating the original data with the fixed data:
genes[farms_to_fix] <- fixed_farms
Here we compute aggregates of resistance genes, these aggregates being the sums of all the resistance genes but also the sum of all the resistance genes by class of antimicrobial against which they are effective. Indeed, we will perform the analyses in 3 different ways: per resistant gene, for all resistant genes altogether, and per class of antimicrobials against which the resistant genes are active.
Let’s start by retrieving the names of resistance genes:
(resistance_genes <- setdiff(names(genes[[1]]),
c("FarmID", "Group", "Sample_Name", "SamplingDay")))
## [1] "aac3-liacde" "aac6-aph2" "aac6-lb" "aac6-li" "aac6-lla"
## [6] "aadA" "aadE" "aadE-like" "acrA" "acrB"
## [11] "acrF" "aph2-lb" "aph2-lde" "aph3-lac" "aph3-lll"
## [16] "arnA" "bacA_1" "bacA_2" "blaAMPC" "blaCMY2"
## [21] "blaCTXM" "blaDHA" "blaPSE" "blaSHV" "blaTEM"
## [26] "blaZ" "cat" "cblA" "cepA" "cepA2"
## [31] "cfr" "cfr2" "cfxA" "cmlA1-01" "cmr"
## [36] "dfrA" "dfrF" "ermA" "ermB" "ermC"
## [41] "floR" "folA" "fosB" "fox5" "macB"
## [46] "mcr-1" "mcr-2" "mcr-3" "mdtF" "mdtL"
## [51] "mdtO" "mecA" "mefa10" "mefA3" "mfsA"
## [56] "oprD" "oqxA" "oqxB" "qacA" "qacC"
## [61] "qacE" "qnrA" "qnrB" "qnrS" "sat4"
## [66] "spc" "strB" "sul1" "sul2" "sul3"
## [71] "tetB" "tetC-01" "tetM" "tetO" "tetQ"
## [76] "tetW" "tolC" "vanA" "vanB" "vatA"
As a reminder classes looks like this:
classes
## # A tibble: 81 × 2
## EvaGreen_Name Antimicrobial_Class
## <chr> <chr>
## 1 aac3-liacde Aminoglycoside
## 2 aac6-aph2 Aminoglycoside
## 3 aac6-lb Aminoglycoside
## 4 aac6-li Aminoglycoside
## 5 aac6-lla Aminoglycoside
## 6 aadA Aminoglycoside
## 7 aadE Aminoglycoside
## 8 aadE-like Aminoglycoside
## 9 acrA Multidrug
## 10 acrB Multidrug
## # … with 71 more rows
Let’s add a category All to the
Antimicrobial_Class variable. This category will simply
correspond to all the resistance genes:
classes %<>%
bind_rows(data.frame(EvaGreen_Name = resistance_genes,
Antimicrobial_Class = "All"))
The antimicrobial classes against which each resistance gene is active now looks like:
(classes_names <- unique(classes$Antimicrobial_Class))
## [1] "Aminoglycoside" "Multidrug" "Polymyxin" "Other"
## [5] "Beta-Lactam" "Phenicol" "Sulfonamide" "MLSB"
## [9] "Quinolone" "Tetracycline" "Glycopeptide" "All"
Note that Other and All categories are
actually not antimicrobials classes per se. The following function
calculates the sum of the genes concentration for a given class
am_class for the data frame gene_farm of a
given farm:
sum_by_class <- function(am_class, gene_farm) {
classes %>%
filter(Antimicrobial_Class == am_class) %>%
pull(EvaGreen_Name) %>%
extract(gene_farm, .) %>%
rowSums()
}
The following function uses the one above and adds as many variables as antimicrobial classes (12) to the data frame of a given farm:
add_sums_by_class <- function(gene_farm) {
gene_farm %>%
map_dfc(classes_names, sum_by_class, .) %>%
setNames(classes_names) %>%
bind_cols(gene_farm, .)
}
Let’s use this function to add the sums of the genes concentrations to the data frames of all the farms:
genes %<>% map(add_sums_by_class)
As a reminder the data on AMU looks like this:
amu
## # A tibble: 90 × 5
## FarmID AgeUse_Day DurationOfUse_days ProductNo AAI
## <chr> <dbl> <dbl> <dbl> <chr>
## 1 K06 30 4 1 sulfadimidine
## 2 K06 30 4 1 sulfaquinoxaline
## 3 K06 35 3 2 doxycyline
## 4 K06 35 3 2 tylosin
## 5 K07 1 4 1 oxytetracyline
## 6 K07 1 4 1 colistin
## 7 K07 36 1 2 ampicillin
## 8 K07 36 1 2 colistin
## 9 K07 38 3 3 flumequine
## 10 K07 38 3 4 enrofloxacin
## # … with 80 more rows
In order to work with AMU by antimicrobial class, we need to generate
the antimicrobial class of each of the antimicrobial of the
AAI column. For that, we can use the
ab_group() function of the AMR package, but,
for consistency with the classes data frame, we need to
generate a hash table that provide the correspondence between the
spelling of the given by the AMR::ab_group() function and
the spelling used in our classes data frame:
# AMR package: classes data frame:
hash <- c("Aminoglycosides" = "Aminoglycoside",
"Amphenicols" = "Phenicol",
"Antifungals/antimycotics" = "Antifungals/antimycotics",
"Beta-lactams/penicillins" = "Beta-Lactam",
"Cephalosporins (3rd gen.)" = "Beta-Lactam",
"Macrolides/lincosamides" = "MLSB",
"Other antibacterials" = "Other",
"Polymyxins" = "Polymyxin",
"Quinolones" = "Quinolone",
"Tetracyclines" = "Tetracycline",
"Trimethoprims" = "Sulfonamide")
Let’s generate the correspondence data frame:
(amu_classes <- amu %>%
select(AAI) %>%
unique() %>%
mutate(AMR_package = map_chr(AAI, ab_group),
classes_df = hash[AMR_package]))
## Warning: in `as.ab()`: these values could not be coerced to a valid antimicrobial
## ID: "bromhexine".
## # A tibble: 25 × 3
## AAI AMR_package classes_df
## <chr> <chr> <chr>
## 1 sulfadimidine Trimethoprims Sulfonamide
## 2 sulfaquinoxaline Other antibacterials Other
## 3 doxycyline Tetracyclines Tetracycline
## 4 tylosin Macrolides/lincosamides MLSB
## 5 oxytetracyline Tetracyclines Tetracycline
## 6 colistin Polymyxins Polymyxin
## 7 ampicillin Beta-lactams/penicillins Beta-Lactam
## 8 flumequine Quinolones Quinolone
## 9 enrofloxacin Quinolones Quinolone
## 10 ceftiofur Cephalosporins (3rd gen.) Beta-Lactam
## # … with 15 more rows
Let’s check for missing values:
filter(amu_classes, if_any(.fns = is.na))
## # A tibble: 1 × 3
## AAI AMR_package classes_df
## <chr> <chr> <chr>
## 1 bromhexine <NA> <NA>
Let’s fix it manually:
amu_classes[amu_classes$AAI == "bromhexine", "classes_df"] <- "Mucoactive agent"
Finally, we can use amu_classes to make another hash
table:
hash <- with(amu_classes, setNames(classes_df, AAI))
And we can use this new hash table to add the antimicrobial class to
the amu data frame:
(amu %<>% mutate(class = hash[AAI]))
## # A tibble: 90 × 6
## FarmID AgeUse_Day DurationOfUse_days ProductNo AAI class
## <chr> <dbl> <dbl> <dbl> <chr> <chr>
## 1 K06 30 4 1 sulfadimidine Sulfonamide
## 2 K06 30 4 1 sulfaquinoxaline Other
## 3 K06 35 3 2 doxycyline Tetracycline
## 4 K06 35 3 2 tylosin MLSB
## 5 K07 1 4 1 oxytetracyline Tetracycline
## 6 K07 1 4 1 colistin Polymyxin
## 7 K07 36 1 2 ampicillin Beta-Lactam
## 8 K07 36 1 2 colistin Polymyxin
## 9 K07 38 3 3 flumequine Quinolone
## 10 K07 38 3 4 enrofloxacin Quinolone
## # … with 80 more rows
Here, the categories used in the class variable of the
amu data frame are consistent with the categories used in
the Antimicrobial_Class of the classes data
frame.
This function duplicates the first row of a data frame and paste it at the end of the data frame:
duplicate_first <- function(x) {
bind_rows(x, x[1, ])
}
This function remove the last row of a data frame:
remove_last <- function(x) {
x[-nrow(x), ]
}
This function get the last value of the “y” variable of a data frame:
get_last <- function(x) {
unlist(x[nrow(x), "y"])
}
Preparing the data for plotting:
tmp <- amu %>%
mutate(end = AgeUse_Day + DurationOfUse_days) %>%
group_by(FarmID) %>%
group_split() %>%
map(arrange, AgeUse_Day, end) %>%
map(duplicate_first) %>%
bind_rows() %>%
mutate(y = row_number()) %>%
group_by(FarmID) %>%
group_split()
last_values <- tmp %>%
map_int(get_last)
tmp %<>%
map(remove_last) %>%
bind_rows()
classes_names <- tmp %>%
pull(class) %>%
unique()
hash <- classes_names %>%
length() %>%
brewer.pal("Set3") %>%
setNames(classes_names)
tmp %<>% mutate(color = hash[class])
Plotting the data:
lwd_val <- 2
y_val <- c(.16347245, .86858097)
opar <- par(plt = c(.05, .7, y_val))
plot(NA, xlim = c(-7, 120), ylim = c(0, (max(tmp$y) + 1)),
type = "n", xlab = "age of flock (days)", ylab = "", axes = FALSE,
xaxs = "i", yaxs = "i")
box(bty = "o")
axis(1)
with(tmp, segments(AgeUse_Day, y, end, y, col = color, lwd = lwd_val))
abline(h = last_values, col = "grey")
abline(v = 0)
abline(v = seq(7, 120, 7), col = "grey")
tmp %>%
group_by(FarmID) %>%
summarise(y = mean(y)) %$%
text(-3.5, y, FarmID, cex = .5)
par(plt = c(.71, 1, y_val), new = TRUE)
plot(1:10, type = "n", axes = FALSE, ann = FALSE)
legend("center", legend = names(hash), col = hash, lwd = lwd_val, lty = 1, bty = "n")
par(opar)
The time points:
genes %>%
map(pull, SamplingDay) %>%
unlist() %>%
unique()
## [1] "Before" "7" "14" "60" "End" "After" "30" "90"
Generating new time points and ordering the data chronologically:
genes %<>% map(mutate,
SamplingDay2 = as.integer(recode(SamplingDay,
Before = "-7",
After = "0",
End = "120"))) %>%
map(arrange, Group, SamplingDay2) # making sure that data are arranged chronologically
A utility function for the function
plot_gene_concentration() that follows after. This function
adds the dots and lines to a plot:
plot_data <- function(x, gene, col, lwd = 2, lty = 3) {
lines2 <- function(...) lines(..., col = col, lwd = lwd)
nrows <- nrow(x)
x2 <- x[-c(1, nrows), ]
x3 <- x[1:2, ]
x4 <- x[(nrows - 1):nrows, ]
points(x$SamplingDay2, x[[gene]], col = col, lwd = lwd)
lines2(x2$SamplingDay2, x2[[gene]])
lines2(x3$SamplingDay2, x3[[gene]], lty = lty)
lines2(x4$SamplingDay2, x4[[gene]], lty = lty)
}
Another utility function for the function
plot_gene_concentration() that follows after. This function
plot the axes, their range and labels:
plot_frame <- function(..., type = "n") {
plot(type = type, xlim = c(-10, 120), axes = FALSE, ...)
ats <- c(-7, seq(0, 120, 20))
lbs <- ats
lbs[1] <- "before"
lbs[length(lbs)] <- "end"
axis(1, ats, lbs)
axis(2)
}
Let’s now define colors that we will use throughout for the control and treatment experiments:
exp_col <- c(treatment = "red", control = "blue")
The following function uses the two previous functions to plots the
gene concentrations as a function of time for a given resistance gene
gene in a given farm farm. And this for both
control and treatment experiments:
plot_gene_concentration <- function(farm, gene, text, col = exp_col) {
farm_dataset <- genes[[farm]]
plot_frame(farm_dataset$SamplingDay2, farm_dataset[[gene]],
xlab = "time after treatment (days)", ylab = "gene concentration")
farm_dataset %>%
filter(Group == "Control") %>%
plot_data(gene, col["control"])
farm_dataset %>%
filter(Group == "Treatment") %>%
plot_data(gene, col["treatment"])
mtext(text)
abline(v = 0, lwd = 2)
}
The text argument is a character string to use as the
title of the plot. Plotting aac3-liacde for control and
treatment experiments in farm K06:
plot_gene_concentration(farms[1], resistance_genes[1], resistance_genes[1])
legend("right", legend = names(exp_col), col = exp_col, lty = 1, lwd = 2, bty = "n")
Number of columns we want and some graph tuning:
ncols <- 4
plt_val <- c(.13, .92, .22, .9)
Plotting all the genes for the first farm:
opar <- par(mfrow = c(ceiling(length(resistance_genes) / ncols), ncols), plt = plt_val)
walk(resistance_genes, ~ plot_gene_concentration(farms[1], .x, .x))
par(opar)
Plotting all the farms for the first gene:
opar <- par(mfrow = c(ceiling(length(farms) / ncols), ncols), plt = plt_val)
walk(farms, ~ plot_gene_concentration(.x, resistance_genes[1], .x))
par(opar)
Here we want to plot on a single graph the data from all the farms
(with control and treatment experiments), with one line per experiment
time series. For that, we first need to standardize the genes
concentration in order to ensure that dynamics are comparable across
experiments and farms. The standardization is done by taking the
Before measurement as a reference value.
The following function allows to standardize the data for all the resistance genes from one given experiment (i.e. either control or treatment in one given farm):
standardize_by_before_ <- function(x) {
rbind(rep(1, length(resistance_genes)),
sweep(as.matrix(x[x$SamplingDay != "Before", resistance_genes]), 2,
unlist(x[x$SamplingDay == "Before", resistance_genes]), `/`))
}
Here x is the subset of the data frame of genes
concentrations of a given farm that correspond either to the control of
the treatment experiment. This function below uses the one above and
standardizes the data for all the resistance genes for both the control
and treatment experiments of a given farm:
standardize_by_before <- function(x) {
x[, resistance_genes] <- rbind(standardize_by_before_(filter(x, Group == "Control")),
standardize_by_before_(filter(x, Group == "Treatment")))
x
}
Now here x is the data frame of genes concentrations of
a given farm. Let’s use this function to standardize the gene
concentrations for all the farms:
genes_standardized <- map(genes, standardize_by_before)
Let’s now plot the concentrations of a given gene across the farms on a single plot. Let’s first define the following function that draws the layout the plot:
plot_frame2 <- function(...) {
plot_frame(..., xlab = "time (days)", ylab = "standardized genes concentrations")
}
The same on with a log scale y-axis:
plot_frame2log <- function(...) {
plot_frame(..., xlab = "time (days)", ylab = "log(standardized genes concentrations)")
}
Let’s also define a function that plot vertical and horizontal lines and adds title:
lines_title <- function(x, h = 0) {
mtext(x)
abline(h = h, lwd = 2)
abline(v = 0, lwd = 2)
}
Let’s now explore various ways to plot the data.
Now, we can start exploring various options of plotting the data. Let’s start by considering this transformation function:
transformation <- function(x) {
mutate_at(x, 3, log10)
}
This function replaces infinity values in the third column of data
frame x by some specified value:
replace_infinity <- function(x, infinity = -1000) {
mutate_at(x, 3, ~ replace(.x, is.infinite(.x), infinity))
}
With the 4 functions above being defined, we can proceed and explore plotting:
plot_gene_all_farms <- function(gene, infinity = -1000, h = 0, pf = plot_frame2) {
tmp <- genes_standardized %>%
map(extract, c("Group", "SamplingDay2", gene)) %>%
map(transformation)
tmp2 <- bind_rows(tmp)
pf(tmp2[[2]], tmp2[[3]])
tmp %>%
map(replace_infinity, infinity) %>%
map(~ split(.x, .x$Group)) %>%
unlist(recursive = FALSE) %>%
map(mutate, col = exp_col[(Group == "Control") + 1]) %>%
sample() %>% # we want to shuffle the treatments for unbiased visualization
walk(~ plot_data(.x, gene, col = .x$col))
lines_title(gene, h)
}
Note here that we transform the standardized genes concentrations.
Depending on the chosen transformation, it may generate infinity values.
For visualization purpose we replace these values by the big number
infinity in the list of the function’s arguments. Let’s try
it on one resistance gene:
plot_gene_all_farms(resistance_genes[1], pf = plot_frame2log)
legend("bottomright", legend = names(exp_col), col = exp_col, lty = 1, lwd = 2, bty = "n")
Let’s try an alternative visualization, using this following function for box-plots:
boxplot2 <- function(x, eps, col, ...) {
boxplot(x[[3]] ~ x[[2]], at = unique(x[[2]]) + eps, add = TRUE, axes = FALSE,
boxwex = 2.5, col = adjustcolor(col, .5), outline = FALSE, ...)
}
Here is the alternative visualization:
plot_gene_all_farms <- function(gene, col = exp_col, eps = 1.5, h = 0,
transform = I, plot_frame = plot_frame2, plot_fct = boxplot2) {
tmp <- genes_standardized %>%
bind_rows() %>%
filter(SamplingDay != "Before") %>%
select(c("Group", "SamplingDay2", gene)) %>%
transform()
plot_frame(tmp[[2]], tmp[[3]])
control <- tmp %>%
filter(Group == "Control") %>%
arrange(SamplingDay2) # because boxplot() and vioplot() are not used with the formula option
points(jitter(control[[2]] - eps), control[[3]], col = col["control"])
plot_fct(control, -eps, col = col["control"])
treatment <- tmp %>%
filter(Group == "Treatment") %>%
arrange(SamplingDay2) # because boxplot() and vioplot() are not used with the formula option
points(jitter(treatment[[2]] + eps), treatment[[3]], col = col["treatment"])
plot_fct(treatment, eps, col = col["treatment"])
lines_title(gene, h)
}
Let’s try it:
plot_gene_all_farms(resistance_genes[1], h = 1)
## Note: Using an external vector in selections is ambiguous.
## ℹ Use `all_of(gene)` instead of `gene` to silence this message.
## ℹ See <https://tidyselect.r-lib.org/reference/faq-external-vector.html>.
## This message is displayed once per session.
legend("topright", legend = names(exp_col), fill = adjustcolor(exp_col, .5), bty = "n")
But a log scale might be more appropriate:
plot_gene_all_farms(resistance_genes[1], transform = transformation, plot_frame = plot_frame2log)
legend("bottomright", legend = names(exp_col), fill = adjustcolor(exp_col, .5), bty = "n")
Let’s try a violin plot instead, by using this function:
vioplot2 <- function(x, eps, color, ...) {
vioplot::vioplot(x[[3]] ~ x[[2]], at = unique(x[[2]]) + eps, add = TRUE,
axes = FALSE, fill = color, lineCol = color, border = color,
wex = 4, col = adjustcolor(color, .5), ...)
}
Let’s try plotting the data with the violin plot:
plot_gene_all_farms(resistance_genes[1], h = 1, plot_fct = vioplot2)
legend("topright", legend = names(exp_col), fill = adjustcolor(exp_col, .5), bty = "n")
plot_frame3 <- function(...) {
plot_frame(..., xlab = "time (days)",
ylab = "difference in standardized genes concentrations")
}
Let’s plot the difference between treatment and control for each farm. For that, we need the following function:
plot_differences <- function(gene) {
tmp <- genes %>%
map(extract, c("SamplingDay2", "Group", gene)) %>%
map(pivot_wider, names_from = Group, values_from = 3) %>%
map(mutate, difference = Treatment - Control)
tmp %>%
bind_rows() %$%
plot_frame3(SamplingDay2, difference)
walk(tmp, with, lines(SamplingDay2, difference, col = "green", lwd = 2))
lines_title(gene)
}
Let’s try it:
plot_differences(resistance_genes[1])
Let’s consider this function for box-plots and violin plots:
plot_differences <- function(gene, plot_fct = boxplot2) {
tmp <- genes %>%
map(extract, c("SamplingDay2", "Group", gene)) %>%
map(pivot_wider, names_from = Group, values_from = 3) %>%
map(mutate, difference = Treatment - Control) %>%
bind_rows() %>%
arrange(SamplingDay2) %>% # for boxplot() and vioplot()
select(Control, SamplingDay2, difference) # for boxplot2() and vioplot2()
with(tmp, plot_frame3(jitter(SamplingDay2), difference, col = "green", type = "p"))
plot_fct(tmp, 0, col = "green")
lines_title(gene)
}
Let’s try it:
plot_differences(resistance_genes[1], boxplot2)
Let’s look at the violin alternative:
plot_differences(resistance_genes[1], vioplot2)
Let’s consider another option.
plot_differences_quantiles <- function(gene, col_lines = "steelblue3", col_area = "lightblue") {
tmp <- genes %>%
map(extract, c("SamplingDay2", "Group", gene)) %>%
map(pivot_wider, names_from = Group, values_from = 3) %>%
map(mutate, difference = Treatment - Control) %>%
bind_rows() %>%
select(SamplingDay2, difference)
with(tmp, plot_frame3(jitter(SamplingDay2), difference, col = col_area, type = "p"))
x_val <- sort(unique(tmp$SamplingDay2))
tmp %>%
group_by(SamplingDay2) %>%
group_split() %>%
map(~ quantile(.x$difference, c(.25, .5, .75), na.rm = TRUE)) %>%
bind_rows() %>%
mutate(x_val = x_val) %>%
with({
polygon(c(x_val, rev(x_val)), c(`25%`, rev(`75%`)),
col = adjustcolor(col_area, .2), border = col_lines, lty = 3)
points(x_val, `50%`, lwd = 2, col = col_lines)
arrows(x_val, `25%`, x_val, `75%`, .1, 90, 3, lwd = 2, col = col_lines)
lines(x_val, `50%`, lty = 2, col = col_lines)
})
lines_title(gene)
}
Let’s try it:
plot_differences_quantiles(resistance_genes[1])